FirmAI

Estimated reading time: 6 minutes

from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, LabelSet
from bokeh.models.formatters import NumeralTickFormatter
import pandas as pd

output_notebook()

<div class="bk-root">
    <a href="https://bokeh.pydata.org" target="_blank" class="bk-logo bk-logo-small bk-logo-notebook"></a>
    <span id="0aa7ba42-8fd8-4e98-a271-d0a17bbf6005">Loading BokehJS ...</span>
</div>

# Create the initial dataframe
index = ['sales','returns','credit fees','rebates','late charges','shipping']
data = {'amount': [350000,-30000,-7500,-25000,95000,-7000]}
df = pd.DataFrame(data=data,index=index)

# Determine the total net value by adding the start and all additional transactions
net = df['amount'].sum()

df

	amount
sales	350000
returns	-30000
credit fees	-7500
rebates	-25000
late charges	95000
shipping	-7000

# Create additional columns that we will use to build the waterfall
df['running_total'] = df['amount'].cumsum()
df['y_start'] = df['running_total'] - df['amount']

# Where do we want to place the label
df['label_pos'] = df['running_total']

df

	amount	running_total	y_start	label_pos
sales	350000	350000	0	350000
returns	-30000	320000	350000	320000
credit fees	-7500	312500	320000	312500
rebates	-25000	287500	312500	287500
late charges	95000	382500	287500	382500
shipping	-7000	375500	382500	375500

# We need to have a net column at the end with the totals and a full bar
df_net = pd.DataFrame.from_records([(net, net, 0, net)], 
                                   columns=['amount', 'running_total', 'y_start', 'label_pos'],
                                   index=["net"])
df = df.append(df_net)

df

	amount	running_total	y_start	label_pos
sales	350000	350000	0	350000
returns	-30000	320000	350000	320000
credit fees	-7500	312500	320000	312500
rebates	-25000	287500	312500	287500
late charges	95000	382500	287500	382500
shipping	-7000	375500	382500	375500
net	375500	375500	0	375500

# We want to color the positive values gray and the negative red
df['color'] = 'grey'
df.loc[df.amount < 0, 'color'] = 'red'

# The 10000 factor is used to make the text positioned correctly.
# You will need to modify if the values are significantly different
df.loc[df.amount < 0, 'label_pos'] = df.label_pos - 10000
df["bar_label"] = df["amount"].map('{:,.0f}'.format)

df

	amount	running_total	y_start	label_pos	color	bar_label
sales	350000	350000	0	350000	grey	350,000
returns	-30000	320000	350000	310000	red	-30,000
credit fees	-7500	312500	320000	302500	red	-7,500
rebates	-25000	287500	312500	277500	red	-25,000
late charges	95000	382500	287500	382500	grey	95,000
shipping	-7000	375500	382500	365500	red	-7,000
net	375500	375500	0	375500	grey	375,500

# Build the Bokeh figure

# Limit the tools to only these three
TOOLS = "box_zoom,reset,save"

# Build the source data off the df dataframe
source = ColumnDataSource(df)

# Create the figure and assign range values that look good for the data set
p = figure(tools=TOOLS, x_range=list(df.index), y_range=(0, net+40000), plot_width=800, title = "Sales Waterfall")
p.grid.grid_line_alpha=0.3

# Add the segments
p.segment(x0='index', y0='y_start', x1="index", y1='running_total', source=source, color="color", line_width=55)

# Format the y-axis as dollars
p.yaxis[0].formatter = NumeralTickFormatter(format="($ 0 a)")
p.xaxis.axis_label = "Transactions"

# Add the labels
labels = LabelSet(x='index', y='label_pos', text='bar_label', text_font_size="8pt", level='glyph',
         x_offset=-20, y_offset=0, source=source)
p.add_layout(labels)

show(p)

/Users/dereksnow/anaconda/envs/py36/lib/python3.6/site-packages/bokeh/core/json_encoder.py:80: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
  elif np.issubdtype(type(obj), np.float):